Load in packages and data

## Acquire demographic data on tennis players

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stringr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
demographics <- read.csv("https://raw.githubusercontent.com/JeffSackmann/tennis_atp/master/atp_players.csv")

clean up some variable names

demographics <- demographics %>%
  rename(Country = ioc, Player_Id = player_id, First_Name = name_first,
         Last_Name = name_last, DOB = dob, Height = height, Wikidata_Id = wikidata_id)

Plot country demographics of ATP mens players

country_plot <- demographics %>% 
  count(Country) %>%
  mutate(Prop = n/sum(n),
         Percentage = round(Prop*100,2)) %>%
  ggplot(aes(x = Country, y = Percentage, fill = Country)) + 
  geom_bar(stat = "identity") +
  coord_cartesian(clip = "off") +
 # scale_y_continuous(labels = percent_format()) +
  theme(axis.text.x = element_blank(),
        axis.title = element_blank()
  ) +
  ggtitle("Country breakdown of Players via Country affiliation") +
  labs(x = "Country", y = "Percentage")

ggplotly(country_plot)

Notes about the variables

Height is in centimeters (cm).